{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from siuba import *\n",
    "from siuba import meta_hook\n",
    "\n",
    "import pandas as pd\n",
    "\n",
    "from pandas import DataFrame, Series"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = DataFrame({\n",
    "    \"repo\": [\"pandas\", \"dplyr\", \"ggplot2\", \"plotnine\"],\n",
    "    \"owner\": [\"pandas-dev\", \"tidyverse\", \"tidyverse\", \"has2k1\"],\n",
    "    \"language\": [\"python\", \"R\", \"R\", \"python\"],\n",
    "    \"stars\": [17800, 2800, 3500, 1450],\n",
    "    \"x\": [1,2,3,None]\n",
    "    })"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## mutate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pandas.core.groupby import DataFrameGroupBy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>repo</th>\n",
       "      <th>owner</th>\n",
       "      <th>language</th>\n",
       "      <th>stars</th>\n",
       "      <th>x</th>\n",
       "      <th>rel_stars1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>pandas</td>\n",
       "      <td>pandas-dev</td>\n",
       "      <td>python</td>\n",
       "      <td>17800</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>dplyr</td>\n",
       "      <td>tidyverse</td>\n",
       "      <td>R</td>\n",
       "      <td>2800</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>ggplot2</td>\n",
       "      <td>tidyverse</td>\n",
       "      <td>R</td>\n",
       "      <td>3500</td>\n",
       "      <td>3.0</td>\n",
       "      <td>700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>plotnine</td>\n",
       "      <td>has2k1</td>\n",
       "      <td>python</td>\n",
       "      <td>1450</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       repo       owner language  stars    x  rel_stars1\n",
       "0    pandas  pandas-dev   python  17800  1.0           0\n",
       "1     dplyr   tidyverse        R   2800  2.0           0\n",
       "2   ggplot2   tidyverse        R   3500  3.0         700\n",
       "3  plotnine      has2k1   python   1450  NaN           0"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gdf = group_by(df, 'language', \"owner\")\n",
    "\n",
    "out = mutate(gdf, rel_stars1 = _.stars - _.stars.min())\n",
    "\n",
    "ungroup(out)\n",
    "\n",
    "df.siu_group_by(\"language\", \"owner\").siu_mutate(rel_stars1 = _.stars - _.stars.min()).siu_ungroup()\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>repo</th>\n",
       "      <th>owner</th>\n",
       "      <th>language</th>\n",
       "      <th>stars</th>\n",
       "      <th>x</th>\n",
       "      <th>rel_stars1</th>\n",
       "      <th>rel_stars2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>pandas</td>\n",
       "      <td>pandas-dev</td>\n",
       "      <td>python</td>\n",
       "      <td>17800</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>35600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>dplyr</td>\n",
       "      <td>tidyverse</td>\n",
       "      <td>R</td>\n",
       "      <td>2800</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0</td>\n",
       "      <td>5600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>ggplot2</td>\n",
       "      <td>tidyverse</td>\n",
       "      <td>R</td>\n",
       "      <td>3500</td>\n",
       "      <td>3.0</td>\n",
       "      <td>700</td>\n",
       "      <td>7000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>plotnine</td>\n",
       "      <td>has2k1</td>\n",
       "      <td>python</td>\n",
       "      <td>1450</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>2900</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       repo       owner language  stars    x  rel_stars1  rel_stars2\n",
       "0    pandas  pandas-dev   python  17800  1.0           0       35600\n",
       "1     dplyr   tidyverse        R   2800  2.0           0        5600\n",
       "2   ggplot2   tidyverse        R   3500  3.0         700        7000\n",
       "3  plotnine      has2k1   python   1450  NaN           0        2900"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ungroup(mutate(out, rel_stars2 = _.stars + _.stars))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>repo</th>\n",
       "      <th>owner</th>\n",
       "      <th>language</th>\n",
       "      <th>stars</th>\n",
       "      <th>x</th>\n",
       "      <th>rel_stars1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>pandas</td>\n",
       "      <td>pandas-dev</td>\n",
       "      <td>python</td>\n",
       "      <td>17800</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>dplyr</td>\n",
       "      <td>tidyverse</td>\n",
       "      <td>R</td>\n",
       "      <td>2800</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>ggplot2</td>\n",
       "      <td>tidyverse</td>\n",
       "      <td>R</td>\n",
       "      <td>3500</td>\n",
       "      <td>3.0</td>\n",
       "      <td>700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>plotnine</td>\n",
       "      <td>has2k1</td>\n",
       "      <td>python</td>\n",
       "      <td>1450</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       repo       owner language  stars    x  rel_stars1\n",
       "0    pandas  pandas-dev   python  17800  1.0           0\n",
       "1     dplyr   tidyverse        R   2800  2.0           0\n",
       "2   ggplot2   tidyverse        R   3500  3.0         700\n",
       "3  plotnine      has2k1   python   1450  NaN           0"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df \\\n",
    "  .siu_group_by(\"language\", \"owner\") \\\n",
    "  .siu_mutate(rel_stars1 = _.stars - _.stars.min()) \\\n",
    "  .siu_ungroup()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## filter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>repo</th>\n",
       "      <th>owner</th>\n",
       "      <th>language</th>\n",
       "      <th>stars</th>\n",
       "      <th>x</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>pandas</td>\n",
       "      <td>pandas-dev</td>\n",
       "      <td>python</td>\n",
       "      <td>17800</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>ggplot2</td>\n",
       "      <td>tidyverse</td>\n",
       "      <td>R</td>\n",
       "      <td>3500</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      repo       owner language  stars    x\n",
       "0   pandas  pandas-dev   python  17800  1.0\n",
       "1  ggplot2   tidyverse        R   3500  3.0"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# TODO: change name filter to query?\n",
    "\n",
    "# regular filter\n",
    "filter(df, _.stars > 3000, _.stars < 15000)\n",
    "\n",
    "# grouped filter\n",
    "gdf = group_by(df, \"language\")\n",
    "\n",
    "ungroup(filter(gdf, _.stars != _.stars.min()))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## summarize"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>language</th>\n",
       "      <th>ttl_stars</th>\n",
       "      <th>wat</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>R</td>\n",
       "      <td>6300</td>\n",
       "      <td>2800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>python</td>\n",
       "      <td>19250</td>\n",
       "      <td>1450</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  language  ttl_stars   wat\n",
       "0        R       6300  2800\n",
       "1   python      19250  1450"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# summarize DataFrame\n",
    "summarize(df, min_stars = _.stars.min())\n",
    "\n",
    "# summarize grouped DataFrame\n",
    "gdf = group_by(df, \"language\")\n",
    "\n",
    "summarize(gdf, ttl_stars = _.stars.sum(), wat = _.stars.min())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## transmute"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>language</th>\n",
       "      <th>repo</th>\n",
       "      <th>rel_stars1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>python</td>\n",
       "      <td>pandas</td>\n",
       "      <td>16350</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>R</td>\n",
       "      <td>dplyr</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>R</td>\n",
       "      <td>ggplot2</td>\n",
       "      <td>700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>python</td>\n",
       "      <td>plotnine</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  language      repo  rel_stars1\n",
       "0   python    pandas       16350\n",
       "1        R     dplyr           0\n",
       "2        R   ggplot2         700\n",
       "3   python  plotnine           0"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "transmute(df, \"repo\", rel_stars1 = _.stars - _.stars.min())\n",
    "\n",
    "ungroup(transmute(gdf, \"repo\", rel_stars1 = _.stars - _.stars.min()))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## select"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>y</th>\n",
       "      <th>repo</th>\n",
       "      <th>owner</th>\n",
       "      <th>stars</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>pandas</td>\n",
       "      <td>pandas-dev</td>\n",
       "      <td>17800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2.0</td>\n",
       "      <td>dplyr</td>\n",
       "      <td>tidyverse</td>\n",
       "      <td>2800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3.0</td>\n",
       "      <td>ggplot2</td>\n",
       "      <td>tidyverse</td>\n",
       "      <td>3500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>plotnine</td>\n",
       "      <td>has2k1</td>\n",
       "      <td>1450</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     y      repo       owner  stars\n",
       "0  1.0    pandas  pandas-dev  17800\n",
       "1  2.0     dplyr   tidyverse   2800\n",
       "2  3.0   ggplot2   tidyverse   3500\n",
       "3  NaN  plotnine      has2k1   1450"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# thoughts:\n",
    "#  + can use dynamic values, e.g. colname == .x\n",
    "#  + if select implements some name class, then nothing magic happening\n",
    "#    e.g. _.y == _.x is equivalent to lambda cols: cols.y == cols.x\n",
    "#  - long winded (==, _.y seems harder to read than \"y\")\n",
    "# select(df, _.y == _.x, -_.language)\n",
    "\n",
    "select(df, _.y == _.x, -_.language)\n",
    "# considered alternative with strings. E.g...\n",
    "# select(df, \"y = x\", \"language\")\n",
    "# select(df, dict(y = \"x\"), \"language\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## arrange"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>repo</th>\n",
       "      <th>owner</th>\n",
       "      <th>language</th>\n",
       "      <th>stars</th>\n",
       "      <th>x</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>plotnine</td>\n",
       "      <td>has2k1</td>\n",
       "      <td>python</td>\n",
       "      <td>1450</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>dplyr</td>\n",
       "      <td>tidyverse</td>\n",
       "      <td>R</td>\n",
       "      <td>2800</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>ggplot2</td>\n",
       "      <td>tidyverse</td>\n",
       "      <td>R</td>\n",
       "      <td>3500</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>pandas</td>\n",
       "      <td>pandas-dev</td>\n",
       "      <td>python</td>\n",
       "      <td>17800</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       repo       owner language  stars    x\n",
       "3  plotnine      has2k1   python   1450  NaN\n",
       "1     dplyr   tidyverse        R   2800  2.0\n",
       "2   ggplot2   tidyverse        R   3500  3.0\n",
       "0    pandas  pandas-dev   python  17800  1.0"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arrange(df, -_.owner, _.repo)\n",
    "\n",
    "arrange(df, _.owner.str.len())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## helpers (if_else, case_when)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['no', 'yeah', 'no', 'no'], dtype='<U4')"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "if_else(df.repo == \"dplyr\", \"yeah\", \"no\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['pandas', 'dplyr', 'wat', 'wat'], dtype=object)"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "f = if_else(_.repo.str.contains(\"d\"), _.repo, \"wat\")\n",
    "\n",
    "f(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['incredible!', 'pretty good!', 'pretty good!', 'pretty good!'],\n",
       "      dtype='<U12')"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "case_when(df, {\n",
    "    _.stars > 10000: \"incredible!\",\n",
    "    _.stars > 1000: \"pretty good!\",\n",
    "    _.stars > 100 : \"keep going!\",\n",
    "    True: \"I don't know\"\n",
    "})"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## nest and unnest "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>language</th>\n",
       "      <th>data</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>R</td>\n",
       "      <td>repo      owner  stars    x\n",
       "1    dplyr  ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>python</td>\n",
       "      <td>repo       owner  stars    x\n",
       "0    panda...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  language                                               data\n",
       "0        R        repo      owner  stars    x\n",
       "1    dplyr  ...\n",
       "1   python         repo       owner  stars    x\n",
       "0    panda..."
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# data column is an array of DataFrames\n",
    "nest(df, -_.language, key = \"data\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>language</th>\n",
       "      <th>repo</th>\n",
       "      <th>owner</th>\n",
       "      <th>stars</th>\n",
       "      <th>x</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>R</td>\n",
       "      <td>dplyr</td>\n",
       "      <td>tidyverse</td>\n",
       "      <td>2800</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>R</td>\n",
       "      <td>ggplot2</td>\n",
       "      <td>tidyverse</td>\n",
       "      <td>3500</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>python</td>\n",
       "      <td>pandas</td>\n",
       "      <td>pandas-dev</td>\n",
       "      <td>17800</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>python</td>\n",
       "      <td>plotnine</td>\n",
       "      <td>has2k1</td>\n",
       "      <td>1450</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  language      repo       owner  stars    x\n",
       "0        R     dplyr   tidyverse   2800  2.0\n",
       "1        R   ggplot2   tidyverse   3500  3.0\n",
       "2   python    pandas  pandas-dev  17800  1.0\n",
       "3   python  plotnine      has2k1   1450  NaN"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "unnest(nest(df, -_.language, key = \"data\"), \"data\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## count "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>language</th>\n",
       "      <th>owner</th>\n",
       "      <th>n</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>R</td>\n",
       "      <td>tidyverse</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>python</td>\n",
       "      <td>has2k1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>python</td>\n",
       "      <td>pandas-dev</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  language       owner  n\n",
       "0        R   tidyverse  2\n",
       "1   python      has2k1  1\n",
       "2   python  pandas-dev  1"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "count(df, \"language\", \"owner\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>repo</th>\n",
       "      <th>owner</th>\n",
       "      <th>language</th>\n",
       "      <th>stars</th>\n",
       "      <th>x</th>\n",
       "      <th>n</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>pandas</td>\n",
       "      <td>pandas-dev</td>\n",
       "      <td>python</td>\n",
       "      <td>17800</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>dplyr</td>\n",
       "      <td>tidyverse</td>\n",
       "      <td>R</td>\n",
       "      <td>2800</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>ggplot2</td>\n",
       "      <td>tidyverse</td>\n",
       "      <td>R</td>\n",
       "      <td>3500</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>plotnine</td>\n",
       "      <td>has2k1</td>\n",
       "      <td>python</td>\n",
       "      <td>1450</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       repo       owner language  stars    x  n\n",
       "0    pandas  pandas-dev   python  17800  1.0  1\n",
       "1     dplyr   tidyverse        R   2800  2.0  2\n",
       "2   ggplot2   tidyverse        R   3500  3.0  2\n",
       "3  plotnine      has2k1   python   1450  NaN  1"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "add_count(df, \"language\", \"owner\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Distinct"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>repo</th>\n",
       "      <th>owner</th>\n",
       "      <th>language</th>\n",
       "      <th>stars</th>\n",
       "      <th>x</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>pandas</td>\n",
       "      <td>pandas-dev</td>\n",
       "      <td>python</td>\n",
       "      <td>17800</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>dplyr</td>\n",
       "      <td>tidyverse</td>\n",
       "      <td>R</td>\n",
       "      <td>2800</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     repo       owner language  stars    x\n",
       "0  pandas  pandas-dev   python  17800  1.0\n",
       "1   dplyr   tidyverse        R   2800  2.0"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "distinct(df, _.language, _keep_all = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>language</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>python</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>R</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  language\n",
       "0   python\n",
       "1        R"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "distinct(df, \"language\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>lang2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>python</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>r</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    lang2\n",
       "0  python\n",
       "1       r"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "distinct(df, lang2 = _.language.str.lower())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>lang2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>R</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>PYTHON</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    lang2\n",
       "0       R\n",
       "1  PYTHON"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gdf = group_by(df, \"language\")\n",
    "\n",
    "ungroup(distinct(gdf, lang2 = _.language.str.upper()))\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Joins"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>repo</th>\n",
       "      <th>owner</th>\n",
       "      <th>language</th>\n",
       "      <th>stars</th>\n",
       "      <th>x</th>\n",
       "      <th>repo2</th>\n",
       "      <th>follow</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>pandas</td>\n",
       "      <td>pandas-dev</td>\n",
       "      <td>python</td>\n",
       "      <td>17800</td>\n",
       "      <td>1.0</td>\n",
       "      <td>pandas</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>dplyr</td>\n",
       "      <td>tidyverse</td>\n",
       "      <td>R</td>\n",
       "      <td>2800</td>\n",
       "      <td>2.0</td>\n",
       "      <td>dplyr</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>ggplot2</td>\n",
       "      <td>tidyverse</td>\n",
       "      <td>R</td>\n",
       "      <td>3500</td>\n",
       "      <td>3.0</td>\n",
       "      <td>None</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>plotnine</td>\n",
       "      <td>has2k1</td>\n",
       "      <td>python</td>\n",
       "      <td>1450</td>\n",
       "      <td>NaN</td>\n",
       "      <td>None</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       repo       owner language  stars    x   repo2  follow\n",
       "0    pandas  pandas-dev   python  17800  1.0  pandas    True\n",
       "1     dplyr   tidyverse        R   2800  2.0   dplyr   False\n",
       "2   ggplot2   tidyverse        R   3500  3.0    None    True\n",
       "3  plotnine      has2k1   python   1450  NaN    None   False"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "follow = pd.DataFrame({\n",
    "    'repo': ['pandas', 'dplyr', 'ggplot2', 'plotnine'],\n",
    "    'repo2': ['pandas', 'dplyr', None, None],\n",
    "    'follow': [True, False, True, False]\n",
    "})\n",
    "\n",
    "join(df, follow, how = \"inner\", on = \"repo\")\n",
    "# join(df, df)\n",
    "\n",
    "join(df, follow, how = \"inner\", on = {\"repo\": \"repo2\"})\n",
    "\n",
    "left_join(df, follow, \"repo\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Spread and Gather"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>repo</th>\n",
       "      <th>owner</th>\n",
       "      <th>x</th>\n",
       "      <th>R</th>\n",
       "      <th>python</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>dplyr</td>\n",
       "      <td>tidyverse</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2800.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>ggplot2</td>\n",
       "      <td>tidyverse</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3500.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>pandas</td>\n",
       "      <td>pandas-dev</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>17800.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>plotnine</td>\n",
       "      <td>has2k1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1450.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       repo       owner    x       R   python\n",
       "0     dplyr   tidyverse  2.0  2800.0      NaN\n",
       "1   ggplot2   tidyverse  3.0  3500.0      NaN\n",
       "2    pandas  pandas-dev  1.0     NaN  17800.0\n",
       "3  plotnine      has2k1  NaN     NaN   1450.0"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "df2 = spread(df, 'language', 'stars')\n",
    "df2\n",
    "# this should raise an error, because duplicate id col x key combos\n",
    "#spread(pd.concat([df, df]), 'language', 'stars')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>repo</th>\n",
       "      <th>owner</th>\n",
       "      <th>x</th>\n",
       "      <th>key</th>\n",
       "      <th>value</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>dplyr</td>\n",
       "      <td>tidyverse</td>\n",
       "      <td>2.0</td>\n",
       "      <td>R</td>\n",
       "      <td>2800.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>ggplot2</td>\n",
       "      <td>tidyverse</td>\n",
       "      <td>3.0</td>\n",
       "      <td>R</td>\n",
       "      <td>3500.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>pandas</td>\n",
       "      <td>pandas-dev</td>\n",
       "      <td>1.0</td>\n",
       "      <td>python</td>\n",
       "      <td>17800.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>plotnine</td>\n",
       "      <td>has2k1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>python</td>\n",
       "      <td>1450.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       repo       owner    x     key    value\n",
       "0     dplyr   tidyverse  2.0       R   2800.0\n",
       "1   ggplot2   tidyverse  3.0       R   3500.0\n",
       "2    pandas  pandas-dev  1.0  python  17800.0\n",
       "3  plotnine      has2k1  NaN  python   1450.0"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gather(df2, \"key\", \"value\", \"R\", \"python\", drop_na = True)\n",
    "\n",
    "# TODO\n",
    "# gather(df2, _.key, _.value, _[\"R\":\"python\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Piping"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'x is: 3'"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "f = Pipeable(f = lambda x: x + 1) >> Pipeable(f = lambda x: \"x is: {}\".format(x))\n",
    "\n",
    "f(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>repo</th>\n",
       "      <th>owner</th>\n",
       "      <th>language</th>\n",
       "      <th>stars</th>\n",
       "      <th>x</th>\n",
       "      <th>new_repo</th>\n",
       "      <th>case</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>pandas</td>\n",
       "      <td>pandas-dev</td>\n",
       "      <td>python</td>\n",
       "      <td>17800</td>\n",
       "      <td>1.0</td>\n",
       "      <td>pandas waattt</td>\n",
       "      <td>aw yeah</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     repo       owner language  stars    x       new_repo     case\n",
       "0  pandas  pandas-dev   python  17800  1.0  pandas waattt  aw yeah"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(df\n",
    "  >> mutate(\n",
    "       new_repo = _.repo + \" waattt\",\n",
    "       case = case_when(_, {_.language == \"python\": \"aw yeah\", True: 'wat'})\n",
    "     )\n",
    "  >> filter(_.stars > 5000)\n",
    "  )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>language</th>\n",
       "      <th>wat</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>R</td>\n",
       "      <td>3150.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>python</td>\n",
       "      <td>9625.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  language     wat\n",
       "0        R  3150.0\n",
       "1   python  9625.0"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df >> group_by(_.language) >> summarize(wat = _.stars.mean())"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {
    "height": "calc(100% - 180px)",
    "left": "10px",
    "top": "150px",
    "width": "222px"
   },
   "toc_section_display": true,
   "toc_window_display": true
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
